# delimit ;
clear ;
cd "replication" ;
set more off ; 

* *************************************************************************** ;
* load data 
* *************************************************************************** ;

use "./data/midline.dta", clear ; 

* *************************************************************************** ;
* merge with baseline to get completed_elem (used later for hetero analysis)
* *************************************************************************** ;

merge 1:1 Id using "./data/baseline.dta", keepusing(completed_elem) ;
assert _merge != 1 ; 

* _merge == 2 are respondents with baseline but no midline ; 
drop if _merge == 2 ;
drop _merge ;

* ************************************************************************ ;
* construct outcome variables 
* ************************************************************************ ;

* proportion correct: all questions  ;
egen score_tot = rowmean(midline_knowledge1-midline_knowledge8) ; 

* proportion correct: cleanliness  ;
egen score_cleanliness = rowmean(midline_knowledge1-midline_knowledge2) ;

* proportion correct: midwifery, maternal and infant health ;
egen score_midwife = rowmean(midline_knowledge3-midline_knowledge6) ;

* proportion correct: condoms, aids, syphilis ;
egen score_condom = rowmean(midline_knowledge7-midline_knowledge8) ;

* logical check: total scores should line up ;
assert score_tot == (2*score_cleanliness + 4*score_midwife + 2*score_condom)/8 ;

****************************************************************************** ;
* generate randomization strata and wave-class variable
****************************************************************************** ;

egen strata = group(wave neighborhood female mfi) ;
egen waveclass = group(wave class) ;

****************************************************************************** ;
* label variables for the LaTeX tables
****************************************************************************** ;

label var healthonly "HEE" ;
label var healthandpay "HEEC" ;

label var score_tot " " ; 
label var score_cleanliness "\shortstack[l]{Cleanliness \\ and Hygiene}" ; 
label var score_midwife "\shortstack[l]{Midwives,\\Maternal \\ \& Child \\ Health}" ; 
label var score_condom "\shortstack[l]{Condom,\\ AIDS, \\Syphilis}" ; 

label var midline_knowledge1 "\shortstack[l]{Knows\\washing\\ hands\\with sand\\is not as\\effective\\as soap}"  ; 
label var midline_knowledge2 "\shortstack[l]{Knows\\water\\strained\\with\\cloth\\is not\\safe\\to drink}" ; 
label var midline_knowledge3 "\shortstack[l]{Knows\\iron\\deficiency\\in pregnancy\\is avoided\\through\\diet}" ;
label var midline_knowledge4 "\shortstack[l]{Knows\\sonography\\not\\essential\\in every\\pre-natal\\check-up}" ; 
label var midline_knowledge5 "\shortstack[l]{Knows\\child on\\exclusive\\breastfeeding\\can't be\\given water}" ;
label var midline_knowledge6 "\shortstack[l]{Knows\\having a\\trained\\midwife\\is im-\\portant}" ;
label var midline_knowledge7 "\shortstack[l]{Knows\\AIDS\\ is not\\ curable}" ;
label var midline_knowledge8 "\shortstack[l]{Knows\\syphilis\\can be\\contracted\\through\\unpro-\\tected sex}" ;

****************************************************************************** ;
* regressions
****************************************************************************** ;

capture est drop score* ;

foreach var of varlist score* { ; 

	areg `var' healthonly healthandpay, a(strata) robust;
	qui est sto `var'; 
	
	test healthonly = healthandpay ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var' ; 
	estadd scalar pval = r(p): `var' ; 

	summarize `var' if control == 1 & e(sample) == 1;
	estadd scalar cmean = r(mean): `var' ; 
	estadd scalar csd = r(sd): `var' ;

} ;

esttab score* using "./output/table-midline-knowledge-aggregate.tex",
	replace
	drop(_cons) 
	cells(b(label() star fmt(%9.3f %9.3f)) se(par)) 
	star(* 0.10 ** 0.05 *** 0.01) 
	stats(titlerow fstat pval cmean csd N, fmt(%9s %9.3f %9.3f %9.3f %9.3f %9.0f) labels("\(F\)-test, HEE = HEEC" "$\qquad$ \(F\)-statistic" "$\qquad$ \(p\)-value" "Control Mean" "Control SD" "N"))
prehead(\begin{table}[htbp] \centering \normalsize `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
		\captionsetup{justification=centering} 
		\caption{Effects on Short-Term Health Knowledge}
		\label{table-midline-knowledge-aggregate}
		\begin{tabular*}{0.85\hsize}{p{4cm}p{1.9cm}p{1.9cm}p{1.9cm}p{1.9cm}}
		\toprule)
	legend label  booktabs  collabels( , none)
	mgroups(
	"All Topics" 
	"By Topic" 
	, pattern(1 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))
	postfoot(`"\bottomrule"'  \end{tabular*} \captionsetup{justification=justified, width=0.85\hsize} 
		\caption*{\footnotesize \textit{Notes:} 
		Outcomes are from a health knowledge test three weeks after the final film screening.
		The dependent variables are the proportion of correct answers in all test questions (Column 1) or 
		in a given subset of questions by topic (Columns 2 to 4).
		Regression results for individual test questions are presented in
		Supplementary Material Table \ref{table-midline-knowledge-by-question}. 
		\textit{HEE} is a dummy for assignment to only health entertainment-education. 
		\textit{HEEC} is a dummy for assignment to health entertainment-education with cash incentives for test performance. 
		Robust standard errors. 
		All regressions include dummies for randomization strata, where strata are defined by gender, neighborhood, and microfinance client status.
		***\$\,p < 0.01$, **\$\,p < 0.05$, *\$\,p<0.10$.
		}
		\end{table}) ;

exit ;
